"""
WeatherSponge
Copyright (c) 2008 Marlon B van der Linde <marlon@mbvdl.net>
See 'COPYING' for license

"""

from BeautifulSoup import BeautifulSoup
import urllib
from xml.etree.ElementTree import fromstring
import sys

class WeatherSponge(object):
	"""

	WeatherSponge handles data extraction from an RSSWeather.com feed

	"""


	def __init__(self, weatherSource, rdf='http://purl.org/rss/1.0/modules/content/'):
		"""
		@param weatherSource: URL of rssweather.com feed to parse
		@type weatherSource: C{str} or C{unicode}

		@param rdf: Resource description framework url
		@type rdf: C{str} or C{unicode}

		"""
		self.wSource = weatherSource
		self.content = None
		self.rdf = rdf
		self.__loadSource()
	

	def __loadSource(self):
		"""
		Loads the feed from the web, and read the contents into self.content	
		"""

		try:
			fileInstance = urllib.urlopen(self.wSource);
			self.content = fileInstance.read()
		except IOError:
			print """Oopsie. urlopen did NOT survive this exception handler"""
			self.content = None
			raise SystemExit(5)


	def parseWeatherData(self):
		"""
		Transform rssweather.com feed into properly formed data
		
		@return: Date of publication, Title and current weather info
		@rtype: C{(str, str, dict)}

		"""
		CONTENT = self.rdf

		rss = fromstring(self.content)
		item = rss.find('channel/item')

		tmp = {}
		tmp[u"pubdate"] = item.find('pubDate').text
		# strip everything out except the weather station name
		tmp[u"pubtitle"] = item.find('title').text.split('Weather')[0].strip()
		# strip everything from the title except the temperature
		tmp[u"temp"] = item.find('title').text.split('::')[-1].split('C')[0].strip()
		contentData = item.find(CONTENT + 'encoded').text

		contentSoup = BeautifulSoup(contentData, convertEntities=BeautifulSoup.HTML_ENTITIES)
		dts = [e.string.rstrip(':').lower().replace(' ','') for e in contentSoup.findAll('dt')]
		dds = [e.string.strip() for e in contentSoup.findAll('dd')]

		assert len(dts) == len(dds)
		info = dict(zip(dts, dds))
		info.update(tmp)
		return info


	def cleanValues(self, dirtydic):
		"""
		Cleans the values of dirtydic and removes units from units for db storage. Returns
		a dictionary in the exact form, but without cruft

		@param dirtydic: dictionary containing values with cruft in them
		@type dirtydic: dict

		@return: dictionary containing cleaned values
		@rtype: dict

		"""
		stripkeys = ['barometer','windchill','dewpoint','heatindex','visibility','humidity',
			'windspeed','temp']
		cleandic = {}
		cleandic.fromkeys(dirtydic.keys())
		for k, v in dirtydic.iteritems():
			if k in stripkeys:
				cleandic[k] = filter(type(v).isdigit, v)
			else:
				cleandic[k] = v	
		return cleandic


if __name__ == "__main__":
	print "Rather Try Importing This"

